** Data reading and variable selection from raw data
** 2000 Canadian General Social Survey

** 01. Reading data **

cap log close
clear all
set more off
cd /*insert you work directory here*/
use /*read your data here*/   


** 02. Consructing year and country variables **

ge year=2000
lab var year "survey year"

ge country=124
lab var country "ISO country code"
//Canada: 124 (see "ISO Country Codes.pdf) 


** 03. ID variables **

encode recid, generate(pid)
lab var pid "person id"


** 04. Basic Demographics (Sex and Age/birth year) **

*create numeric*
encode sex, generate(sex1)
tab sex1
drop sex
rename sex1 sex
lab var sex "sex"
lab def sex 1 "male" 2 "female"
lab val sex sex

rename agec age
lab var age "age"

ge birthyr = year - age

lab var birthyr "Year of Birth"


** 05. Siblings **

* number of brothers/sisters does not include respondent. Original measure includes respondent, so we subtract one.

rename no_flbrs nsibs

* number of brothers and sisters variables. 

lab var nsibs "number of birth siblings"


** 06. Own education **

*convert from string*

encode edu10, generate(educ_cat)


#delimit ;

lab def educ_catL	
                          01 "Doctorate/Mater's/some graduate"
                          02 "Bachelor's degree"
                          03 "Diploma/certificate from community college"
                          04 "Diploma/certificate from trade/technical"
                          05 "Some university"
                          06 "Community college/CEGEP/nursing"
                          07 "Some trade/technical"
                          08 "High school diploma"
                          09 "Some secondary/high school"
                          10 "Elementary/no schooling"
                          11 "Other beyond elementary/high school"
                          98 "Don't know"
                          99 "Not stated";


lab val educ_cat educ_catL

lab var educ_cat "highest level of education completed"

ge educ_yrs = .
replace educ_yrs = 19.573 if educ_cat == 1
replace educ_yrs = 16.015 if educ_cat == 2
replace educ_yrs = 14 if educ_cat == 3
replace educ_yrs = 13 if educ_cat == 4
replace educ_yrs = 14 if educ_cat == 5
replace educ_yrs = 13 if educ_cat == 6
replace educ_yrs = 12 if educ_cat == 7
replace educ_yrs = 12 if educ_cat == 8
replace educ_yrs = 9.083 if educ_cat == 9
replace educ_yrs = 5 if educ_cat == 10
replace educ_yrs = 10.604 if educ_cat == 11
replace educ_yrs = 9.526 if educ_cat == 12
replace educ_yrs = 11 if educ_cat == 13

lab var educ_yrs "Years of education - respondent"



** 07. Parents' education: Father and/or Mother **

*convert from string to numberic**

encode edum10, generate(moeduc_cat)

encode eduf10, generate(faeduc_cat)


lab var faeduc_cat "father's education level"

lab var moeduc_cat "mother's education level"

lab val faeduc_cat educ_catL

lab val moeduc_cat educ_catL

ge faeduc_yrs = .
replace faeduc_yrs = 19.573 if faeduc_cat == 1
replace faeduc_yrs = 16.015 if faeduc_cat == 2
replace faeduc_yrs = 14 if faeduc_cat == 3
replace faeduc_yrs = 13 if faeduc_cat == 4
replace faeduc_yrs = 14 if faeduc_cat == 5
replace faeduc_yrs = 13 if faeduc_cat == 6
replace faeduc_yrs = 12 if faeduc_cat == 7
replace faeduc_yrs = 12 if faeduc_cat == 8
replace faeduc_yrs = 9.083 if faeduc_cat == 9
replace faeduc_yrs = 5 if faeduc_cat == 10
replace faeduc_yrs = 10.604 if faeduc_cat == 11
replace faeduc_yrs = 9.526 if faeduc_cat == 12
replace faeduc_yrs = 11 if faeduc_cat == 13

lab var faeduc_yrs "Years of education - father"


ge moeduc_yrs = .
replace moeduc_yrs = 19.573 if moeduc_cat == 1
replace moeduc_yrs = 16.015 if moeduc_cat == 2
replace moeduc_yrs = 14 if moeduc_cat == 3
replace moeduc_yrs = 13 if moeduc_cat == 4
replace moeduc_yrs = 14 if moeduc_cat == 5
replace moeduc_yrs = 13 if moeduc_cat == 6
replace moeduc_yrs = 12 if moeduc_cat == 7
replace moeduc_yrs = 12 if moeduc_cat == 8
replace moeduc_yrs = 9.083 if moeduc_cat == 9
replace moeduc_yrs = 5 if moeduc_cat == 10
replace moeduc_yrs = 10.604 if moeduc_cat == 11
replace moeduc_yrs = 9.526 if moeduc_cat == 12
replace moeduc_yrs = 11 if moeduc_cat == 13

lab var moeduc_yrs "Years of education - mother"


** 08. Own and parent's occupation **

encode soc91c10, generate(occ_code)

encode m15soc92, generate(moocc_code)

encode f15soc92, generate(faocc_code)


lab var occ_code "Standard Occupation Codes of respondent"

lab var moocc_code "Standard Occupation Codes of respondent of mother"

lab var faocc_code "Standard Occupation Codes of respondent of mother"

#delimit ;
lab def occL	
                          01 "Management occupations"
                          02 "Business, finance and administrative occupations"
                          03 "Natural and applied sciences"
                          04 "Health occupations"
                          05 "Occupations in social science, education"
                          06 "Artistic/culture/recreation/sport"
                          07 "Sales and services occupations"
                          08 "Trades, transport and equipment"
                          09 "Occupations unique to primary industry"
                          10 "Occupations unique to processing and manufacturing"
                          97 "Not asked"
                          98 "Don't know"
                          99 "Not stated";

lab val occ_code occL	

lab val moocc_code occL

lab val faocc_code occL					  
						  
*employment status*

encode acmyrc, generate(emp_stat)

lab var emp_stat "Main activity of the respondent in the last 12 months"


#delimit ;
lab def empL              01 "Working at a paid job or business"
                          02 "Looking for paid work"
                          03 "Going to school"
                          04 "Caring for children"
                          05 "Household work"
                          06 "Retired"
                          07 "Maternity / paternity leave"
                          08 "Long term illness"
                          09 "Other"
                          99 "Not stated";
						  
lab val emp_stat empL


** 9. Tabulate the Identified Variables **

log using /*insert you work directory here*/, replace text


** Data reading and variable selection from raw data
** 2000 Canadian General Social Survey

** Sex **
tab sex

** Age, Birth Year **
sum age birthyr, d

** Siblings **
sum nsibs, d

** R's Own Education **
tab1 educ_cat educ_yrs

** Parental Education **
tab1 faeduc_cat moeduc_cat faeduc_yrs moeduc_yrs

** R's Own Occupation **
tab1 occ_code faocc_code moocc_code emp_stat

log close

** 11. Keep the identified variables only

keep year country pid sex age birthyr ///
	 nsibs  ///
	 educ_cat educ_yrs faeduc_cat moeduc_cat faeduc_yrs moeduc_yrs ///
	 occ_code faocc_code moocc_code emp_stat

	 
**Create ISCED Education Variable**

ge educ_ISCED = .
replace educ_ISCED = 300 if educ_cat == 11
replace educ_ISCED = 000 if educ_cat == 10
replace educ_ISCED = 100 if educ_cat == 9
replace educ_ISCED = 200 if educ_cat == 8
replace educ_ISCED = 300 if educ_cat == 7
replace educ_ISCED = 400 if educ_cat == 6
replace educ_ISCED = 400 if educ_cat == 5
replace educ_ISCED = 500 if educ_cat == 4
replace educ_ISCED = 500 if educ_cat == 3
replace educ_ISCED = 600 if educ_cat == 2
replace educ_ISCED = 750 if educ_cat == 1

ge moeduc_ISCED = .
replace moeduc_ISCED = 300 if moeduc_cat == 11
replace moeduc_ISCED = 000 if moeduc_cat == 10
replace moeduc_ISCED = 100 if moeduc_cat == 9
replace moeduc_ISCED = 200 if moeduc_cat == 8
replace moeduc_ISCED = 300 if moeduc_cat == 7
replace moeduc_ISCED = 400 if moeduc_cat == 6
replace moeduc_ISCED = 400 if moeduc_cat == 5
replace moeduc_ISCED = 500 if moeduc_cat == 4
replace moeduc_ISCED = 500 if moeduc_cat == 3
replace moeduc_ISCED = 600 if moeduc_cat == 2
replace moeduc_ISCED = 750 if moeduc_cat == 1

ge faeduc_ISCED = .
replace faeduc_ISCED = 300 if faeduc_cat == 11
replace faeduc_ISCED = 000 if faeduc_cat == 10
replace faeduc_ISCED = 100 if faeduc_cat == 9
replace faeduc_ISCED = 200 if faeduc_cat == 8
replace faeduc_ISCED = 300 if faeduc_cat == 7
replace faeduc_ISCED = 400 if faeduc_cat == 6
replace faeduc_ISCED = 400 if faeduc_cat == 5
replace faeduc_ISCED = 500 if faeduc_cat == 4
replace faeduc_ISCED = 500 if faeduc_cat == 3
replace faeduc_ISCED = 600 if faeduc_cat == 2
replace faeduc_ISCED = 750 if faeduc_cat == 1


** 12. Save the Data File **

saveold /*insert you work directory here*/, replace

